{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "386dbd82",
   "metadata": {},
   "source": [
    "# Enhance Microsoft Copilot with Elasticsearch\n",
    "\n",
    "This notebook execute an API that allows you to search for invoices using Elasticsearch generating a Ngrok tunnel to expose the API to the internet. This notebook is based on the article [Enhance Microsoft Copilot with Elasticsearch](https://www.elastic.co/blog/enhance-microsoft-copilot-with-elasticsearch)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d460f865",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install fastapi pyngrok uvicorn nest-asyncio elasticsearch==9 -q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3ac47371",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "from getpass import getpass\n",
    "from datetime import datetime\n",
    "\n",
    "import nest_asyncio\n",
    "import uvicorn\n",
    "\n",
    "from fastapi import FastAPI, Query\n",
    "from pyngrok import conf, ngrok\n",
    "\n",
    "from elasticsearch.helpers import bulk\n",
    "from elasticsearch import Elasticsearch"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "64167eee",
   "metadata": {},
   "source": [
    "## Setup Variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa378fdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.environ[\"ELASTICSEARCH_ENDPOINT\"] = getpass(\"Elastic Endpoint: \")\n",
    "os.environ[\"ELASTICSEARCH_API_KEY\"] = getpass(\"Elastic Api Key: \")\n",
    "os.environ[\"NGROK_AUTH_TOKEN\"] = getpass(\"Ngrok Auth Token: \")\n",
    "\n",
    "\n",
    "INDEX_NAME = \"invoices\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "31041b60",
   "metadata": {},
   "source": [
    "## Elasticsearch client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d8a8201",
   "metadata": {},
   "outputs": [],
   "source": [
    "_client = Elasticsearch(\n",
    "    os.environ[\"ELASTICSEARCH_ENDPOINT\"],\n",
    "    api_key=os.environ[\"ELASTICSEARCH_API_KEY\"],\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "07578680",
   "metadata": {},
   "source": [
    "## Mappings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c34a804a",
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    _client.indices.create(\n",
    "        index=INDEX_NAME,\n",
    "        body={\n",
    "            \"mappings\": {\n",
    "                \"properties\": {\n",
    "                    \"id\": {\"type\": \"keyword\"},\n",
    "                    \"file_url\": {\"type\": \"keyword\"},\n",
    "                    \"issue_date\": {\"type\": \"date\"},\n",
    "                    \"description\": {\"type\": \"text\", \"copy_to\": \"semantic_field\"},\n",
    "                    \"services\": {\n",
    "                        \"type\": \"object\",\n",
    "                        \"properties\": {\n",
    "                            \"name\": {\n",
    "                                \"type\": \"text\",\n",
    "                                \"copy_to\": \"semantic_field\",\n",
    "                            },\n",
    "                            \"price\": {\"type\": \"float\"},\n",
    "                        },\n",
    "                    },\n",
    "                    \"total_amount\": {\n",
    "                        \"type\": \"float\",\n",
    "                    },\n",
    "                    \"semantic_field\": {\"type\": \"semantic_text\"},\n",
    "                }\n",
    "            }\n",
    "        },\n",
    "    )\n",
    "\n",
    "    print(\"index created successfully\")\n",
    "except Exception as e:\n",
    "    print(\n",
    "        f\"Error creating inference endpoint: {e.info['error']['root_cause'][0]['reason'] }\"\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "02a2c25a",
   "metadata": {},
   "source": [
    "## Ingesting documents to Elasticsearch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "69f388c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"invoices_data.json\", \"r\", encoding=\"utf-8\") as f:\n",
    "    invoices = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b96c42fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "def build_data():\n",
    "    for doc in invoices:\n",
    "        yield {\"_index\": INDEX_NAME, \"_source\": doc}\n",
    "\n",
    "\n",
    "try:\n",
    "    success, errors = bulk(_client, build_data())\n",
    "    print(f\"{success} documents indexed successfully\")\n",
    "\n",
    "    if errors:\n",
    "        print(\"Errors during indexing:\", errors)\n",
    "\n",
    "except Exception as e:\n",
    "    print(f\"Error: {str(e)}, please wait some seconds and try again.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d38c1869",
   "metadata": {},
   "source": [
    "## Building API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ad221fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "app = FastAPI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76106dad",
   "metadata": {},
   "outputs": [],
   "source": [
    "@app.get(\"/search/semantic\")\n",
    "async def search_semantic(query: str = Query(None)):\n",
    "    try:\n",
    "        result = _client.search(\n",
    "            index=INDEX_NAME,\n",
    "            query={\n",
    "                \"semantic\": {\n",
    "                    \"field\": \"semantic_field\",\n",
    "                    \"query\": query,\n",
    "                }\n",
    "            },\n",
    "        )\n",
    "\n",
    "        hits = result[\"hits\"][\"hits\"]\n",
    "        results = [{\"score\": hit[\"_score\"], **hit[\"_source\"]} for hit in hits]\n",
    "\n",
    "        return results\n",
    "    except Exception as e:\n",
    "        return Exception(f\"Error: {str(e)}\")\n",
    "\n",
    "\n",
    "@app.get(\"/search/by-date\")\n",
    "async def search_by_date(from_date: str = Query(None), to_date: str = Query(None)):\n",
    "    try:\n",
    "        from_dt = datetime.strptime(from_date, \"%m/%d/%Y %I:%M:%S %p\")\n",
    "        to_dt = datetime.strptime(to_date, \"%m/%d/%Y %I:%M:%S %p\")\n",
    "\n",
    "        formatted_from = from_dt.strftime(\"%d/%m/%Y\")\n",
    "        formatted_to = to_dt.strftime(\"%d/%m/%Y\")\n",
    "\n",
    "        result = _client.search(\n",
    "            index=INDEX_NAME,\n",
    "            query={\n",
    "                \"range\": {\n",
    "                    \"issue_date\": {\n",
    "                        \"gte\": formatted_from,\n",
    "                        \"lte\": formatted_to,\n",
    "                        \"format\": \"dd/MM/yyyy\",\n",
    "                    }\n",
    "                }\n",
    "            },\n",
    "        )\n",
    "\n",
    "        hits = result[\"hits\"][\"hits\"]\n",
    "        results = [hit[\"_source\"] for hit in hits]\n",
    "\n",
    "        return results\n",
    "    except Exception as e:\n",
    "        return Exception(f\"Error: {str(e)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf1460e9",
   "metadata": {},
   "source": [
    "## Running the API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "517c85c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "conf.get_default().auth_token = os.environ[\"NGROK_AUTH_TOKEN\"]\n",
    "ngrok_tunnel = ngrok.connect(8000)\n",
    "\n",
    "print(\"Public URL:\", ngrok_tunnel.public_url)\n",
    "\n",
    "nest_asyncio.apply()\n",
    "uvicorn.run(app, port=8000)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ccffd29a",
   "metadata": {},
   "source": [
    "## Delete the index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "991ba4e4",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_results(results):\n",
    "    if results.get(\"acknowledged\", False):\n",
    "        print(\"DELETED successfully.\")\n",
    "\n",
    "    if \"error\" in results:\n",
    "        print(f\"ERROR: {results['error']['root_cause'][0]['reason']}\")\n",
    "\n",
    "\n",
    "# Cleanup - Delete Index\n",
    "result = _client.indices.delete(index=INDEX_NAME, ignore=[400, 404])\n",
    "print_results(result)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
